from AuxLibraries import *
import GWPs
from matplotlib import colors,cm
import local_utils as ut
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import auxiliars as aux
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import sector_maps as smap
import carbon_price as cpg
import TransitionRISKS as tnr

colorpalette = ['#f26419', '#f6ae2d', '#86bbd8', '#33658a', '#2f4858', '#90BE6D', '#43AA8B', '#577590']
GWPcols =  ['#006076', '#ff0000', '#4d0000']




#%%
class FIGURE_():
    def __int__():
        pass

    def FIGURE1(self, X, Z, save_plot=False):
        def get_proportions(X, cols):
            s = X[X.Gas == 'CH4'][[cols, 'ReportingYear', 'cdp_id']].groupby([cols, 'ReportingYear']).count().sort_values(by = ['cdp_id']).reset_index().pivot(index='ReportingYear', columns=cols, values='cdp_id')
            s = s.div(s.sum(axis=1), axis=0)
            if cols == 'GWPV':
                s = s[['Second', 'Third', 'Fourth', 'Fifth', 'Sixth', 'Other']]#, 'Question not applicable', 'Hidden Answer']]
            if cols == 'GWPH':
                s = s[['H=20', 'H=50', 'H=100', 'Other']]#, 'Question not applicable', 'Hidden Answer']]
            s = s.rename(columns = {'Second': 'Second AR',
                                    'Third': 'Third AR',
                                    'Fifth': 'Fifth AR',
                                     'Sixth': 'Sixth AR',                       
                                     'H=20': '20 year', 'H=50': '50 year', 'H=100': '100 year',
                                    'Question not applicable': "Not Applicable", 
                                    'Hidden Answer': 'Hidden'})
            return s
        
        #=== Panel a
        fig, (ax0A, ax0) = plt.subplots(2, 1, sharex=True, figsize = (16,12))
        fig.subplots_adjust(hspace=0.15)  # adjust space between Axes

        X['mrg'] = X['Gas']+'-'+X['GWP_adj']
        dtx = X.merge(Z)
        dtx = dtx[dtx.Gas == 'CH4']
        print('#=====\nStatistics for panel A')
        print('N for the counts in panel a:', len(X))
        print('N for the GWP in panel a:', len(dtx))
        print('Number of companies reporting methane and an available IPCC GWP value:', len(dtx.cdp_id.unique()))
        
        relevant_sample = dtx.copy()
        
        print('#=====')
        m = dtx[['ReportingYear', 'ConversionFactor']].groupby('ReportingYear').mean().rename(columns = {'ConversionFactor': 'Sample Average'})
        s = 1.96*dtx[['ReportingYear', 'ConversionFactor']].groupby('ReportingYear').apply(ut.utils().stdErr).rename(columns = {'ConversionFactor': 'Sample Average'}).drop(columns = ['ReportingYear'])
        m.plot( ax = ax0, marker = 'o',  ls='-.', yerr = s, capsize = 4, ms = 10, color = '#42505f', lw  =  2)
        ax0.axvline(x = 2021, c= 'k', ymax = 80)
        count = 0
        for n in [27,28.4,29.8]:
            dtx0 = pd.DataFrame([[2021,28], [2022,n], [2023,n]], columns = ['ReportingYear', 'Most recent AR 100-year']).set_index('ReportingYear')
            dtx0.plot(marker = '', ms = 10, ax = ax0, color  =GWPcols[count], ls = '-', lw=2, alpha = 1)
            count+=1
            dtx0 = pd.DataFrame([[2014,28], [2015,28], [2016,28], [2017,28], [2018,28], [2019,28], [2020,28], [2021,28]], columns = ['ReportingYear', 'Most recent AR 100-year']).set_index('ReportingYear')
            dtx0.plot(marker = '', ms = 10, ax = ax0, color  = 'k', ls = '-',lw=2,  alpha = 1)
    
        ax0.set_xlabel('Reporting year')
        ax0.set_ylabel('GWP, Methane', y=1, labelpad = 12)
        ax0.legend().remove() #(loc = 'center', bbox_to_anchor=(0.5, 1.1),ncol=2)
        line1    = Line2D([0], [0], label='Most recent GWP', color='gray', ls ='-', marker = '')
        line2    = Line2D([0], [0], label='Sample Average', color='#42505f', ls ='--', marker = 'o')
        ax0.legend(handles=[line1,line2], loc = 'center', bbox_to_anchor = (0.5,2.2), ncol = 2)
        ax0.margins(x=0)
        ax0.spines[['top']].set_visible(False)
        ax0.margins(x=0)
        ax0.text(2014, 29.25, r'GWP$_{100}$')
        ax0.annotate("", xy=(2014.5,28), xytext=(2014.5,29),
                    arrowprops=dict(arrowstyle="->"))

        ax0B   = ax0.twinx()
        count = pd.read_csv('local_data/CDP_respondents.csv').drop(columns = ['Unnamed: 0']).set_index('ReportingYear')
        countGAS = X[X.Gas.isin(['CH4', 'CO2', 'N2O', 'HFCs', 'PFCs','SF6','NF3' ])][['cdp_id', 'ReportingYear']].groupby('ReportingYear').nunique()
        countCH4 = X[X.Gas == 'CH4'][['cdp_id', 'ReportingYear']].groupby('ReportingYear').nunique()
        c = pd.concat((count, countGAS, countCH4), axis = 1)
        c.columns = ['Reporting to CDP', 'Explicit disclosure of individual gases', 'Reporting on CH$_4$']
        c.plot(kind = 'area', stacked = False, ax = ax0B,color = ['gray', '#c1440e', '#006d5b'], alpha = 0.4)
        ax0B.legend(loc = 'center', bbox_to_anchor=(0.45, 1.2),ncol=1)
        ax0B.margins(x=0.01)
        ax0B.set_ylabel('Number of \n reporting companies')
        ax0B.set_xlabel('Reporting year')
        ax0B.spines[['top']].set_visible(False)
        ax0B.margins(x=0)
        count = 0
        for n in [79.7, 81.1, 82.5]:
            dtx0A = pd.DataFrame([[2021,84], [2022,n], [2023,n]], columns = ['ReportingYear', 'Most recent AR 100-year']).set_index('ReportingYear')
            dtx0A.plot(marker = '', ms = 10, ax = ax0A, color  =GWPcols[count], lw=2, ls = '-', alpha = 1, legend=False)
            count+=1
            dtx0A = pd.DataFrame([[2014,84], [2015,84], [2016,84], [2017,84], [2018,84], [2019,84], [2020,84], [2021,84]], columns = ['ReportingYear', 'Most recent AR 100-year']).set_index('ReportingYear')
            dtx0A.plot(marker = '', ms = 10, ax = ax0A, color  = 'k', ls = '-', lw=2, alpha = 1, legend=False)
        ax0A.text(2022, 80., 'non-fossil', fontsize = 22)
        ax0A.text(2022.1, 81.5, 'average', fontsize = 22)
        ax0A.text(2022.25, 82.8, 'fossil', fontsize = 22)
        ax0A.text(2014, 85.5, r'GWP$_{20}$')
        ax0A.annotate("", xy=(2014.5,84), xytext=(2014.5,85.4),
                    arrowprops=dict(arrowstyle="->"))
        ax0A.axvline(x = 2021, c= 'k')
        ax0A.spines[['right']].set_visible(False)
        ax0A.margins(x=0.01)

        ax0A.set_ylim(79,85)  # outliers only
        ax0.set_ylim(23.5, 30)  # most of the data
        ax0A.spines.bottom.set_visible(False)
        ax0.spines.top.set_visible(False)
        ax0A.spines.top.set_visible(False)
        ax0.tick_params(labeltop=False)  # don't put tick labels at the top
        ax0.xaxis.tick_bottom()

        ax0A.text(-0.05, 1.16, 'a', transform=ax0A.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
        d = .5  # proportion of vertical to horizontal extent of the slanted line
        kwargs = dict(marker=[(-1, -d), (1, d)], markersize=12,
                      linestyle="none", color='k', mec='k', mew=1, clip_on=False)

        ax0A.plot([0, 1], [0, 0], transform=ax0A.transAxes, **kwargs)
        ax0.plot([0, 1], [1, 1], transform=ax0.transAxes, **kwargs)



        if save_plot:
            plt.savefig('Figures/Figure1A.pdf', dpi = 300, bbox_inches = 'tight')  
            
        #=========
        #=== Panel b
        #=====
        print('#=====\nStatistics for panel B')
        print('Number of observations:', len(X[X.Gas == 'CH4']))
        print('Number of companies reporting methane emissions for panel b and c (these include any type of GWP):', len(X[X.Gas == 'CH4'].cdp_id.unique()))
        print('#=====')
        #=====

        fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize = (16,12))

        cols = 'GWPV'
        arcols = ['#96897b', '#d6d4a0']
        ax1.axvspan(-1,7.5, color=arcols[0], alpha=0.4, lw=0)
        ax1.axvspan(7.5,9.5, color=arcols[1], alpha=0.4, lw=0)
        #====
        AR5 = mpatches.Patch(color=arcols[0], label='AR5')
        AR6 = mpatches.Patch(color=arcols[1], label='AR6')
        legend1 = ax1.legend(handles=[AR5,AR6], # title = 'Most recent GWPs', 
                             loc='center', bbox_to_anchor = (0.5,1.15),ncol = 2)
        
        s = get_proportions(X, cols)
        s.plot(kind = 'bar', stacked = True, ax = ax1, rot=0,width=0.5,color=colorpalette, edgecolor='k', lw=1.2, alpha = 0.8)
        legend0 = ax1.legend(loc = 'center left', bbox_to_anchor = (0.98,0.5))
        ax1.set_xlabel('')
        ax1.set_ylabel('Fraction of firms')
        ax1.add_artist(legend0)
        ax1.add_artist(legend1)
        ax1.text(-0.1, 1.25, 'b', transform=ax1.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
        ax1.spines[['right', 'top']].set_visible(False)

        #=== Panel c
        cols = 'GWPH'
        ax2.axvspan(-1,7.5, color=arcols[0], alpha=0.4, lw=0)
        ax2.axvspan(7.5,9.5, color=arcols[1], alpha=0.4, lw=0)
        s = get_proportions(X, cols)
        s.plot(kind = 'bar', stacked = True, ax = ax2, rot=0,edgecolor='k', lw=1.2, width=0.5,color=colorpalette)
        ax2.legend(loc = 'center left', bbox_to_anchor = (1,0.5))
        plt.xlabel('')
        ax2.set_ylabel('Fraction of firms')
        ax2.set_xlabel('Reporting year')
        ax2.text(-0.1, 1.15, 'c', transform=ax2.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
        ax2.spines[['right', 'top']].set_visible(False)
        

        if save_plot:
            plt.savefig('Figures/Figure1A2.pdf', dpi = 300, bbox_inches = 'tight')  

        return relevant_sample
    
    #================================================================
    #================================================================
    
    def FIGURE1B(self, db, db2, Z, ax, GASTYPE = 'CH4', HORIZON = 100, labels = ['d', 'e', 'f'], save_plot=False):

        count = 0
        t = []
        if HORIZON == 100:
            latest_methane_list = [27,28.4,29.8]
            linest = '-'
        else:
            latest_methane_list = [79.7, 81.1, 82.5]
            linest = '-'

        print('Horizon:', HORIZON)
        for latest_methane in latest_methane_list:
            #=== This is the main calculation of the counterfactual
            A = GWPs.Counterfactual(db, Z, [2014,2015,2016,2017, 2018,2019,2020,2021], 'AR5',horizon=HORIZON) 
            B = GWPs.Counterfactual(db, Z, [2022, 2023], 'AR6', latest_methane,horizon=HORIZON) 
            X = pd.concat((A, B))       
            X = X[X.Gas == GASTYPE]

            print('Effective number of firms:', len(X.cdp_id.unique()))
            X['mrg'] = X['cdp_id'].astype(int).astype(str)+X['ReportingYear'].astype(str)+X['Gas']
            X = X.groupby('mrg').last().reset_index(drop=True)
            print('Number of observations for panel a-c:', len(X))
    
            #=== Cumulative sum --- This is the methane conversion gap
            s = X[['Delta', 'ReportingYear']].groupby('ReportingYear').sum()
            s/=1e6
            s.cumsum().plot(marker = 'o', ls = linest,   ms= 8, lw = 3, c = GWPcols[count], ax = ax, legend=False)
            t.append(s.cumsum().iloc[-1].iloc[0])
            print(latest_methane, s.cumsum().iloc[-1].iloc[0])
            ax.set_ylabel(r'Conversion Gap, cumulative (MtCO$_2$e)')
            ax.set_xlabel('')
            ax.set_xlabel('Reporting year')
            #plt.axhline(y=0,  xmax= 1, c = 'k', ls = '-.', lw = 1)
            count+=1
        if HORIZON == 100:
            plt.text(2022.2,t[0]+5, 'GWP$_{100}$ = '+str(latest_methane_list[0]), rotation = 0,fontsize=14)
            plt.text(2022.5,t[1]-4.5, 'GWP$_{100}$ = '+str(latest_methane_list[1]), rotation = -15,fontsize=14)
            plt.text(2022.5,t[2]-12, 'GWP$_{100}$ = '+str(latest_methane_list[2]), rotation = -28,fontsize=14)
            props = dict(boxstyle='round', facecolor='#7ac2e0', alpha=0.5)
            plt.text(2013.2, 0.6, 'Counterfactual = GWP$_{100}$', bbox=props) #,fontsize=18)
        elif HORIZON == 20:
            props = dict(boxstyle='round', facecolor='#7ac2e0', alpha=0.5)
            plt.text(2013.8, 0.5, 'Counterfactual = GWP$_{20}$', bbox=props) #,fontsize=18)

        ax.text(-0.1, 1.3, labels[0], transform=ax.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
        ax.spines[['right', 'top']].set_visible(False)

        #=== Sector colors and ordering
        cmap =  cm.get_cmap('coolwarm')
        cols = [colors.rgb2hex(cmap(i)) for i in np.linspace(0,1,10)]
        #==============================
        #=== Make the insets
        count = 0 
        left, bottom, width, height = [.5, 0.75, 0.4, 0.6]
        ax2 = ax.inset_axes([left, bottom, width, height])
        if HORIZON == 100: 
            latest_methane = 28.4
        elif HORIZON == 20: 
            latest_methane = 81.1
        A = GWPs.Counterfactual(db2, Z, [2014,2015,2016,2017, 2018,2019,2020,2021], 'AR5',horizon=HORIZON) 
        B = GWPs.Counterfactual(db2, Z, [2022, 2023], 'AR6', latest_methane,horizon=HORIZON) 
        X = pd.concat((A, B))       
        X = X[X.Gas == GASTYPE]
        X['mrg'] = X['cdp_id'].astype(int).astype(str)+X['ReportingYear'].astype(str)+X['Gas']
        X = X.groupby('mrg').last().reset_index(drop=True)
        order = X[['Delta', 'main_sector']].groupby('main_sector').mean().sort_values(by = 'Delta', ascending=False).drop(index = 'Other').index
        #= Cumulative distribution
        order_dict = dict()
        for i in order:
            tmp = X[X.main_sector == i]['Delta']/1e6
            sns.ecdfplot(data = tmp,color=cols[count], ax = ax2, log_scale=False); 
            order_dict[i] = cols[count]
            count+=1
        if HORIZON == 100:
            ax2.legend(labels=list(order), loc = 'center left', bbox_to_anchor  = (1,0.5), fontsize = 18,frameon=False)
        
        ax2.ticklabel_format(axis='x', style='plain', scilimits=(7,7))
        ax2.set_yscale('log')
        ax2.set_xlabel('Conversion Gap, MtCO$_2$e')
        ax2.spines[['right', 'top']].set_visible(False)
        ax2.text(-0.1, 1.16, labels[1], transform=ax2.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
        
        
        
        left, bottom, width, height = [.12, 0.12, 0.32, 0.38]
        ax3 = ax.inset_axes([left, bottom, width, height])   
        count = 0
        for latest_methane in latest_methane_list:
            A = GWPs.Counterfactual(db, Z, [2014,2015,2016,2017, 2018,2019,2020,2021], 'AR5',horizon=HORIZON) 
            B = GWPs.Counterfactual(db, Z, [2022, 2023], 'AR6', latest_methane,horizon=HORIZON) 
            X = pd.concat((A, B))       
            X = X[X.Gas == GASTYPE]
            X['mrg'] = X['cdp_id'].astype(int).astype(str)+X['ReportingYear'].astype(str)+X['Gas']
            X = X.groupby('mrg').last().reset_index(drop=True)
        
            #=== Cumulative sum
            s = X[['Value (Equalised)', 'ReportingYear']].groupby('ReportingYear').sum()
            s/=1e6
            s.index = s.index.astype(int).astype(str)
            s.cumsum().plot(marker = 'o', ms= 8, lw = 3, c = GWPcols[count], ax = ax3, legend=False)
            print('Total cumulative reported Methane (counterfactual):', s.cumsum().iloc[-1])
            ax3.set_ylabel(' ') #'Emission, cumulative, (MtCO$_2$e)')
            plt.xlabel('')
            ax3.set_xlabel('')
            count+=1
        s = X[['Value (tCO2e)', 'ReportingYear']].astype(float).groupby('ReportingYear').sum()
        s/=1e6
        s.index = s.index.astype(int).astype(str)
        s.cumsum().plot(marker = 'o', ms= 8, lw = 3, c = 'gray', ls = '-.', ax = ax3, legend=False)
        print('Total cumulative reported Methane:', s.cumsum().iloc[-1])
        line    = Line2D([0], [0], label='Reported', color='gray', ls ='--', marker = 'o')
        ax3.legend(handles=[line],fontsize=18)
        plt.tight_layout()
        ax3.spines[['right', 'top']].set_visible(False)
        ax3.set_xlabel('')
        ax3.text(-0.1, 1.16, labels[2], transform=ax3.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
    
        if save_plot:
            plt.savefig('Figures/Figure2BA'+str(HORIZON)+'.pdf', dpi = 300, bbox_inches = 'tight')
        return order_dict, X
    
    
    
    def FIGURE1G(self, db, Z, order, ax, delta_type = 'Delta',   HORIZON = 100, save_plot=False, labels = ['d', 'e']):


        if HORIZON == 100: 
            lat_methane = 28.4
        elif HORIZON == 20: 
            lat_methane = 81.1
        print('#======', lat_methane)
        A = GWPs.Counterfactual(db, Z, [2014,2015,2016,2017,2018,2019,2020,2021], 'AR5', horizon=HORIZON) 
        B = GWPs.Counterfactual(db, Z, [2022, 2023], 'AR6',latest_methane=lat_methane, horizon=HORIZON) 
        X = pd.concat((A, B))
        GASTYPE = 'CH4'
        X = X[X.Gas == GASTYPE]

        X['mrg'] = X['cdp_id'].astype(int).astype(str)+X['ReportingYear'].astype(str)+X['Gas']
        X = X.groupby('mrg').last().reset_index(drop=True)
        print('Number of observations for panel d,e:', len(X))
        
        #========= You need a bit of filtering otherwise outliers really scre your mean (1%)
        Xunfiltered = X.copy()
        X = X[X.Delta.abs() < X.Delta.abs().quantile(0.99)]
        #=========
        
    
        sector_type = 'our_sectors'
        cutting_var = 'main_sector'
        X = X.loc[X.main_sector.dropna().index].reset_index(drop=True)
        X['our_sectors'] = X['main_sector']
        X['our_sectors'] = X['our_sectors'].replace(['ICT', 'Health Care'], ['Health + ICT', 'Health + ICT'])
         
        
        
        #===========================
        #========= Average ========= 
        #===========================
        ma = X[X.main_sector.isin(list(order.keys())[-5:][::-1])][['ReportingYear', delta_type]].groupby('ReportingYear').mean()
        sa = X[X.main_sector.isin(list(order.keys())[-5:][::-1])][['ReportingYear', delta_type]].groupby('ReportingYear').apply(ut.utils().stdErr).drop(columns = ['ReportingYear'])
        ma.columns = ['']
        sa.columns = ['']
        if delta_type == 'Delta':
            ma/=1e6
            sa/=1e6
        ma.plot(c='gray', lw = 2, ls = '-.', marker = 'o', yerr=  sa, capsize = 4, ax = ax,legend=False)
        ax.fill_between(sa.index, (ma+sa).values.ravel().tolist(), (ma-sa).values.ravel().tolist(),alpha = 0.5,color='gray')
    
        #============================
        #========= Granular ========= 
        #============================
        m = X[['ReportingYear', cutting_var, delta_type]].groupby(['ReportingYear', cutting_var]).mean().reset_index()
        m = m.pivot(index='ReportingYear', columns=cutting_var, values=delta_type)
        s = X[['ReportingYear', cutting_var, delta_type]].groupby(['ReportingYear', cutting_var]).apply(ut.utils().stdErr).reset_index()
        s = s.pivot(index='ReportingYear', columns=cutting_var, values=delta_type)
        if  delta_type == 'Delta':
            m/=1e6
            s/=1e6
        markers = ['o']*6
        shifts = [-0.375,-0.25,-0.125,0.125,0.25,0.375]
        count = 0
        for i in list(order.keys())[-5:][::-1]:
            tmp1 = m[[i]]
            tmp1.index +=shifts[count]
            A = pd.DataFrame(tmp1.values.tolist(),columns = [i], index = tmp1.index.values)
            B = pd.DataFrame(s[[i]].values.tolist(),columns = [i], index = tmp1.index.values)
            A.plot(yerr = B, ls = '', ms = 10, capsize = 4, c = order[i], marker = 'o', ax = ax, legend = False)
            count+=1
            
        #=================================
        #========= Figure lables ========= 
        #=================================
        if delta_type == 'Delta':
            ax.axhline(y=0,c='k', lw=1, ls='-.')
        else:
            ax.axhline(y=1,c='k', lw=1, ls='-.')
    
        for i in [2014,2015,2016,2017, 2018,2019,2020,2021,2022,2023]:
            ax.axvline(x=i+0.5, c = 'k', ls = '--', lw = 0.8)
        ax.set_xlabel('')
        ax.set_ylabel('Conversion gap , MtCO$_2$e')
        if delta_type == 'GWP_ratio':
            plt.ylabel('Reported over most recent GWP')
        ax.set_xlabel('Reporting year')
        ax.margins(x=0.025)
        ax.spines[['right', 'top']].set_visible(False)
        ax.text(-0.1, 1.1, labels[0], transform=ax.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
    
    
        ##### Total 
        axT = ax.twinx()
        ma = Xunfiltered[['ReportingYear', delta_type]].groupby('ReportingYear').sum()
        ma.columns = ['']
        if delta_type == 'Delta':
            ma/=1e6
        print('Total gap:', ma.cumsum())
        ma.plot(c='k', lw = 2, ls = '-.', marker = 'o', yerr=  sa, capsize = 4, ax = axT,legend=False)
        axT.spines[[ 'top']].set_visible(False)
        axT.set_ylabel('Yearly gap (black dotted line), MtCO$_2$e ')
    
        if save_plot:
            plt.savefig('Figures/Figure2BB.pdf', dpi = 300, bbox_inches = 'tight')
    #================================================
    #================================================
    #================================================
    def FIGURE3(self, db, Z, HORIZON=100, dimension_change='Horizon', GASD='CH4', 
                change_type = 'percentage_change', sector_choice='ALL', panel = ['a', 'b'], LABEL = '', 
                save_plot=False):
        def sector_decomposition(U,  TIT = '', dimchoice = 'HDIFF', dim = 'main_sector', ax = None, add_legend=True):
            if ax == None: ax = plt.subplot()
            H = U.loc[U[dimchoice].dropna().index].reset_index().copy()  
            H = H[H[dimchoice] !=0]
            a = H[['ReportingYear', 'cdp_id', dim]].groupby(['ReportingYear',  dim]).nunique().reset_index()
            a = a.pivot(index='ReportingYear', columns=dim, values='cdp_id')
            a.plot(kind = 'area', stacked=True, lw=2, colormap = 'tab20', alpha = 0.6, ax = ax, legend = add_legend)
            plt.title(TIT)
            plt.margins(x=0)
            plt.xlabel('Reporting year')
            if add_legend:
                plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5), title = '')

        if HORIZON == 100:
            latest_methane = 28.4 ###
        elif HORIZON == 20:
            latest_methane = 81.1 ###
    
    
        A = GWPs.Counterfactual(db, Z, [2014,2015,2016,2017, 2018,2019,2020,2021], 'AR5',horizon=HORIZON) 
        B = GWPs.Counterfactual(db, Z, [2022, 2023], 'AR6', latest_methane,horizon=HORIZON) 
        X = pd.concat((A, B))    
        X['mrg'] = X['cdp_id'].astype(int).astype(str)+X['ReportingYear'].astype(str)+X['Gas']
        X = X.groupby('mrg').last().reset_index(drop=True)
        X = X[X.Gas == GASD]

    
    
        #=== Track changes in time horizon or Assessment Reports
        X = X.sort_values(by = ['cdp_id', 'ReportingYear'])
        if dimension_change == 'Horizon':
            X['H'] = X['GWPH'].apply(lambda x: x.split('=')[1])
            X['HDIFF'] = X[['cdp_id', 'H']].astype(float).groupby('cdp_id').diff()
            choice_variable = 'HDIFF'
            a = X.HDIFF.dropna()
        elif dimension_change == 'AR':
            X['GWPVNUM'] = X['GWPV'].replace(['Second', 'Third', 'Fourth', 'Fifth', 'Sixth'], 
                                             [2,3,4,5,6])
            X = X.sort_values(by = ['cdp_id', 'ReportingYear'])
            X['DeltaREPORT'] = X[['cdp_id', 'GWPVNUM']].astype(float).groupby('cdp_id').diff()       
            choice_variable  ='DeltaREPORT'
            a = X.DeltaREPORT.dropna()

        
        #=======
        ## For reported and counterfactual emission calculate the average % diff
        ## at time of no change in GWP, at time at positive and negative changes
        r = []
        for emission_type in ['Value (tCO2e)', 'Value (Equalised)']:
            S = X.copy()
            S = S.sort_values(by = ['cdp_id', 'ReportingYear'])
            S[emission_type] = S[emission_type].astype(float)
            #=== Take the YoY changes in emission type (reported or counterfactual)
            S['Delta'+GASD] = 100*S[['cdp_id', emission_type]].astype(float).groupby('cdp_id').pct_change()
            #=== Only consecutive years observations
            S['DeltaTIME'] = S[['cdp_id', 'ReportingYear']].astype(float).groupby('cdp_id').diff()
            S = S[S.DeltaTIME == 1]
            #=== Only companies not following standards in year Y
            S = S[S.Delta.abs()>1e-8]
            #===

            #===
            S['mrg']=S['cdp_id'].astype(int).astype(str)+'-'+S['ReportingYear'].astype(int).astype(str)
            S = S.loc[S['Delta'+GASD].dropna().index].reset_index(drop=True)
            S = S[S['Delta'+GASD] < np.inf]            

            #==== Just a few counts
            if dimension_change == 'Horizon':
                a = S.HDIFF.dropna()
            elif dimension_change == 'AR':
                a = S.DeltaREPORT.dropna()
                
            #==== A few statistics
            #M1 = pd.read_csv('local_data/scope1_emissions.csv')
            M1 = aux.full_scope1_emissions()
            companies_in_sample = S.loc[a[a!=0].index].cdp_id.unique()
            print('#=================================================')
            print('Number of observations:', len(S))
            print('Number of companies:', len(S.cdp_id.unique()))
            print('Number of switching companies:', len(companies_in_sample))
            print('Fraction of emissions:', str(100*round(M1[M1.cdp_id.isin(companies_in_sample)]['Scope 1'].sum()/M1['Scope 1'].sum(), 2))+'%')
            print('Proportion of switching companies:', str(100*round(len(companies_in_sample)/len(S.cdp_id.unique()),2))+'%')
            print('#=================================================')
            #==
            idx = list(S.cdp_id.unique())
            for i in idx:
                tmp = S[S.cdp_id == i]
                r.append([tmp[tmp[choice_variable] == 0]['Delta'+GASD].mean(),
                          tmp[tmp[choice_variable] > 0]['Delta'+GASD].mean(),
                          tmp[tmp[choice_variable] < 0]['Delta'+GASD].mean(),
                          emission_type,
                          i])
        if dimension_change == 'Horizon':
            r = pd.DataFrame(r, columns = ['No change ', r'GWP$_{20}$ $\to$ GWP$_{100}$', r'GWP$_{100}$ $\to$ GWP$_{20}$', 'EMTYPE', 'cdp_id'])
            loop_vars = ['No change ', r'GWP$_{20}$ $\to$ GWP$_{100}$', r'GWP$_{100}$ $\to$ GWP$_{20}$']
            resMx,resSx = pd.DataFrame(),pd.DataFrame()
            for i in ['No change ', r'GWP$_{20}$ $\to$ GWP$_{100}$', r'GWP$_{100}$ $\to$ GWP$_{20}$']:
                if change_type == 'percentage_change':
                    A  = r[[i, 'cdp_id']].groupby('cdp_id').pct_change().replace(np.inf, np.nan).dropna().median()
                    resMx = pd.concat((resMx, 100*A))
                    B  = r[[i, 'cdp_id']].groupby('cdp_id').pct_change().replace(np.inf, np.nan).dropna().apply(ut.utils().stdErrBootstrapped)
                    resSx = pd.concat((resSx, 100*B))
                elif change_type == 'first_differences':
                    print('Number of observations in the statistics r (Horizon):', len(r[[i, 'cdp_id']].dropna()))
                    A  = r[[i, 'cdp_id']].groupby('cdp_id').diff().replace(np.inf, np.nan).dropna().median()
                    resMx = pd.concat((resMx, A))
                    B  = r[[i, 'cdp_id']].groupby('cdp_id').diff().replace(np.inf, np.nan).dropna().apply(ut.utils().stdErrBootstrapped)
                    resSx = pd.concat((resSx, B))

        elif dimension_change == 'AR':
            r = pd.DataFrame(r, columns = ['No change ', r'Old AR $\to$ new AR', r'New AR $\to$ old AR', 'EMTYPE', 'cdp_id'])
            loop_vars = ['No change ', r'Old AR $\to$ new AR', r'New AR $\to$ old AR']
            resMx,resSx = pd.DataFrame(),pd.DataFrame()
            for i in ['No change ', r'Old AR $\to$ new AR', r'New AR $\to$ old AR']:
                if change_type == 'percentage_change':
                    A  = r[[i, 'cdp_id']].groupby('cdp_id').pct_change().replace(np.inf, np.nan).dropna().median()
                    resMx = pd.concat((resMx, 100*A))
                    B  = r[[i, 'cdp_id']].groupby('cdp_id').pct_change().replace(np.inf, np.nan).dropna().apply(ut.utils().stdErrBootstrapped)
                    resSx = pd.concat((resSx, 100*B))
                elif change_type == 'first_differences':
                    print('Number of observations in the statistics r (AR):', len(r[[i, 'cdp_id']].dropna()))
                    A  = r[[i, 'cdp_id']].groupby('cdp_id').diff().replace(np.inf, np.nan).dropna().median()
                    resMx = pd.concat((resMx, A))
                    B  = r[[i, 'cdp_id']].groupby('cdp_id').diff().replace(np.inf, np.nan).dropna().apply(ut.utils().stdErrBootstrapped)
                    resSx = pd.concat((resSx, B))


        if dimension_change == 'Horizon': 
            dimchoice = 'HDIFF'
            plot_title = 'Choice of Horizon'
            if HORIZON == 100:
                panel_label = 'a'
            else:
                panel_label = 'd'
        else:
            dimchoice = 'DeltaREPORT'
            plot_title = 'Choice of Assessment Report'
            if HORIZON == 100:
                panel_label = 'c'
            else:
                panel_label = 'f'
        plt.figure()
        ax = plt.subplot()
        sector_decomposition(S, TIT=plot_title,  dimchoice = dimchoice, dim = 'main_sector', ax = ax, add_legend=True)
        if save_plot:
            plt.savefig('Figures/Figure2'+LABEL+'SI'+str(HORIZON)+'.pdf', dpi = 300, bbox_inches = 'tight')
        plt.figure()
        ax = plt.subplot()
        sector_decomposition(S, TIT=plot_title,  dimchoice = dimchoice, dim = 'SP_GEOGRAPHY', ax = ax, add_legend=True)
        if save_plot:
            plt.savefig('Figures/Figure2'+LABEL+'_Region_SI'+str(HORIZON)+'.pdf', dpi = 300, bbox_inches = 'tight')
    
        plt.figure(figsize = (12,10))
        ax2 = plt.subplot()
    
        X['DRS'] = X[choice_variable].apply(np.sign)
        M = S.copy()
        S['ReportingYear'] = S['mrg'].apply(lambda x: int(x.split('-')[1]))
        M['DRS'] = S[choice_variable].apply(np.sign)
        M['mrg']=M['cdp_id'].astype(int).astype(str)+'-'+M['ReportingYear'].astype(int).astype(str)
        #=== With this, you eliminate companies switching to the counterfactual
        M=M[M.mrg.isin(S.mrg.unique())]
        a = M[['ReportingYear', 'cdp_id', 'DRS']].groupby(['ReportingYear', 'DRS']).count().reset_index()
        a = a.pivot(index='ReportingYear', columns='DRS', values= 'cdp_id')
        if dimension_change == 'Horizon':
            plot_cols = [r'GWP$_{100}$ $\to$ GWP$_{20}$', 'No change', r'GWP$_{20}$ $\to$ GWP$_{100}$']
            a.columns = plot_cols  
            a = a[[r'GWP$_{100}$ $\to$ GWP$_{20}$', r'GWP$_{20}$ $\to$ GWP$_{100}$', 'No change']]
        elif dimension_change == 'AR':
            r = pd.DataFrame(r, columns = ['No change', r'Old AR $\to$ new AR', r'New AR $\to$ old AR', 'EMTYPE'])
            plot_cols = [ r'New AR $\to$ old AR', 'No change', r'Old AR $\to$ new AR']
            a.columns = plot_cols   
            a = a[[r'New AR $\to$ old AR',  r'Old AR $\to$ new AR', 'No change']]
        a.drop(columns = ['No change']).plot(kind='area', stacked=True, lw = 3, colormap = 'coolwarm_r', ax = ax2, alpha = 0.6)
        ax2.legend(fontsize = 34)
        ax2.tick_params(axis='both', which='major', labelsize=32)
        ax2.spines[['top']].set_visible(False) 
        ax2.set_xlabel('')
        ax2.margins(x=0)
        ax2.text(-0.1, 1.1, panel_label, transform=ax2.transAxes,
          fontsize=48, fontweight='bold', va='top', ha='right')
        if panel_label in ['a', 'd']:
            ax2.set_ylabel('Number of companies \n with choice change', fontsize = 36, labelpad = 15)
        else:
            ax2.set_ylabel('')

        ax2A = ax2.twinx()
        a['No change'].plot(lw = 2, c= 'k', ls = '--', ax = ax2A, marker = 'o')
        ax2A.margins(x=0)
        ax2A.spines[['top']].set_visible(False) 
        ax2A.set_xlabel('')
        ax2A.tick_params(axis='both', which='major', labelsize=32)

        if panel_label in ['c', 'f']:
            ax2A.set_ylabel('Number of companies \n with no change (dotted line)', fontsize = 36, labelpad = 15)
    
        if save_plot:
            plt.savefig('Figures/Figure2'+LABEL+'2_'+str(HORIZON)+'.pdf', dpi = 300, bbox_inches = 'tight')
        return a, resMx,resSx
    
    def PLOTFIGURE3(self, HRZ,AH, BH, AR, BR, change_type = 'percentage_change', save_plot=False):
        if HRZ == 100:
            panel_label='b'
        else:
            panel_label='e'
        m = pd.concat((pd.DataFrame(AH).transpose().rename(columns = {'No change ': 'Fixed horizon'}), pd.DataFrame(AR).transpose().rename(columns = {'No change ': 'Fixed AR'})))
        s = pd.concat((pd.DataFrame(BH).transpose().rename(columns = {'No change ': 'Fixed horizon'}), pd.DataFrame(BR).transpose().rename(columns = {'No change ': 'Fixed AR'})))
        m.index = ['Change in Horizon', 'Change in Assessment Report']
        s.index = ['Change in Horizon', 'Change in Assessment Report']
        m = m.drop(columns = ['Fixed horizon', 'Fixed AR'])
        s = s.drop(columns = ['Fixed horizon', 'Fixed AR'])
        plt.figure()
        ax = plt.subplot()
        m.plot.bar(yerr = s, capsize = 4, rot = 0, ax = ax, color = ['#e4433f', '#5d86b4', '#bcbcbc', 'orange'])
        ax.spines[['right', 'top']].set_visible(False) 
        if panel_label == 'e':
            ax.set_ylabel('')
        else:
            ax.set_ylabel('Expected emission change \n under counterfactual versus \n realised emission change, %')
        plt.axhline(y=0,ls = '-.', xmax = 0.325, c = 'k', lw= 1)
        plt.axhline(y=0,ls = '-.', xmin = 0.675, c = 'k', lw= 1)
        ax.legend(loc = 'center', bbox_to_anchor = (0.5,0.4))
        props = dict(boxstyle='round', facecolor='#7ac2e0', alpha=0.5)
        plt.text(0.1,250, 'Counterfactual = GWP$_{'+str(HRZ)+'}$', bbox=props)
        ax.set_ylim(-200,340)
        if panel_label == 'e':
            ax.text(0., 1.1, panel_label, transform=ax.transAxes,
                    fontsize=24, fontweight='bold', va='top', ha='right')
        else:
            ax.text(-0.1, 1.1, panel_label, transform=ax.transAxes,
                    fontsize=24, fontweight='bold', va='top', ha='right')

        if save_plot:
            if change_type == 'percentage_change':  
                plt.savefig('Figures/Figure2_main'+str(HRZ)+'.pdf', dpi = 300, bbox_inches = 'tight')
            elif change_type == 'first_differences':
                plt.savefig('Figures/Figure2_main'+str(HRZ)+'_first_differences.pdf', dpi = 300, bbox_inches = 'tight')

    #================================================
    #================================================
    #================================================

    def FIGURE4(self, db, db2, Z, order=-1, GAS='CH4', HORIZON=100, LOGscale = True, save_plot=False):
        #=== The functions for this figure are in carbon_price.py
        CP = cpg.CARBONPRICE()
        #==
        dtm, res = CP.carbon_price_data(db)
        if order == -1:
            print('### Get the right order of sectors for coloring')
            X, order = CP.DistributionBYSECTOR(dtm, Z, res, HORIZON=HORIZON)
        else:
            X, _ = CP.DistributionBYSECTOR(dtm, Z, res, HORIZON=HORIZON)


        M1 = aux.full_scope1_emissions()
        reference_sample = db[(db.GWP_adj.isin(['SAR-20', 'SAR-100', 'TAR-20', 'TAR-100', 'AR4-20', 'AR4-100', 'AR5-20', 'AR5-100', 'AR6-20', 'AR6-100'])) & (db.Gas == 'CH4')].cdp_id.unique()
        subsample_emissions = M1[M1.cdp_id.isin(X.cdp_id.unique())]['Scope 1'].sum()/M1[M1.cdp_id.isin(reference_sample)]['Scope 1'].sum()
        print('European emissions:', str(round(100*subsample_emissions,2))+'%')
        #==
        if LOGscale:
            if HORIZON == 100:
                labs1 = ['a', 'b', 'c']
                labs2 = ['f', 'g', 'h']
                labs3 = ['f']
            if HORIZON == 20:
                labs1 = ['d', 'e']
                labs2 = ['i', 'l', 'm']
                labs3 = ['g']
    
            plt.figure(figsize=(16, 8))
            ax1 = plt.subplot(111)
            econ_impc = CP.PriceGAPCS(X, order, ax1, HRZ=HORIZON, label_panels = labs1, aggregation_method='sum')
            if save_plot:
                plt.savefig('Figures/Figure3A'+str(HORIZON)+'.pdf', dpi = 300, bbox_inches = 'tight')
            #=====
        else:
            labs3=''
        TR = tnr.TRANSITIONRISK()
        tnrisk = TR.TransitionRisk(db2, Z, labs3, order, which_gas =   GAS, LOGscale=LOGscale, HORIZON=HORIZON)
        if save_plot:
            if LOGscale:
                plt.savefig('Figures/Figure3TR'+str(HORIZON)+'.pdf', dpi = 300, bbox_inches = 'tight')
            else:
                plt.savefig('Figures/Figure3TR'+str(HORIZON)+'_SI.pdf', dpi = 300, bbox_inches = 'tight')
        return econ_impc, tnrisk, order
    #================================================
    #==== These figures will go in the SI really ====
    #================================================
    
    #================================================
    #================================================
    #================================================        
    def Figure0_SI(self, db, save_plot=False):
        KyotoGas = ['CO2', 'CH4', 'N2O', 'HFCs', 'PFCs','SF6','NF3' ]
        KyotoGasName = ['CO$_2$', 'CH$_4$', 'N$_2$O', 'HFCs', 'PFCs','SF$_6$','NF$_3$' ]
        count = pd.read_csv('local_data/CDP_respondents.csv').drop(columns = ['Unnamed: 0']).set_index('ReportingYear')
        countGAS = db[db.Gas.isin(KyotoGas)][['cdp_id', 'ReportingYear']].groupby('ReportingYear').nunique()
        c = pd.concat((count, countGAS), axis = 1)
        for K in KyotoGas:
            countK = db[db.Gas == K][['cdp_id', 'ReportingYear']].groupby('ReportingYear').nunique()
            c = pd.concat((c, countK), axis = 1)
        c.columns = ['All firms', 'Breakdown']+KyotoGasName
        c.plot(kind = 'bar', figsize = (18,6), stacked = False, width=0.8,colormap = 'tab20b', alpha = 0.8)
        plt.xlabel('Reporting year')
        plt.ylabel('Number of companies')
        plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5))
        if save_plot:
            plt.savefig('Figures/Figure0SI.pdf', dpi = 300, bbox_inches = 'tight')
    def Figure1_SI(self, db, save_plot=False):
        r = db.copy()
        r['standard'] = ['AR5-100' if r.ReportingYear.iloc[i] < 2022 else 'AR6-100' for i in range(len(r))]
        r['standard_followers'] = [1 if r.GWP_adj.iloc[i] == r.standard.iloc[i] else 0 for i in range(len(r))]
        print('Total number of companies:', len(r.cdp_id.unique()))
        a = r[['ReportingYear', 'main_sector', 'standard_followers']].groupby(['ReportingYear', 'main_sector']).mean()
        a = a.reset_index().pivot(index='ReportingYear', columns='main_sector', values= 'standard_followers')
        a = a.drop(columns = ['Other'])
        a.plot.bar(colormap = 'tab20b', figsize = (14,6))
        plt.axhline(y = a.mean().mean(), c=  'k', lw=2, ls = '--')
        plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5))
        plt.xlabel('Reporting year')
        plt.ylabel('Proportion of companies\n reporting using the \nlatest GWP$_{100}$')
        if save_plot:
            plt.savefig('Figures/Figure1SI.pdf', dpi = 300, bbox_inches = 'tight')


    def FIGURE2_SI(self, dg,Z, sector_type ='main_sector', save_plot=False):
        dk = dg.copy()
        dk['mrg'] = dk['cdp_id'].astype(int).astype(str)+'-'+dk['ReportingYear'].astype(int).astype(str)
        #=== Here you want to be sure that Scope 1 are actually scope 1, without estimations using breakdown for missing data
        #M1 = pd.read_csv('local_data/scope1_emissions.csv')
        M1 = pd.read_csv('local_data/scope1_emissions.csv')
        M1['mrg'] = M1['cdp_id'].astype(int).astype(str)+'-'+M1['ReportingYear'].astype(int).astype(str)
        M2 = aux.get_total_by_gas('Scope_1').rename(columns = {"Scope_1": "Scope 1"})
        M2['mrg'] = M2['cdp_id'].astype(int).astype(str)+'-'+M2['ReportingYear'].astype(int).astype(str)
        M2 = M2[M2.mrg.isin(M1.mrg) == False]
        M1 = pd.concat((M1[['cdp_id', 'ReportingYear', 'Scope 1', 'mrg']],M2[['cdp_id', 'ReportingYear', 'Scope 1', 'mrg']]))
        M1 = M1[M1['Scope 1'] > 0]
        M1 = M1[M1['Scope 1'] < M1['Scope 1'].quantile(0.99)]
        
        dk = dk.merge(M1[['Scope 1', 'mrg']])

        #== Ratio of methane over total Scope 1 emissions
        dk = dk[dk.Gas == 'CH4']
        dk['Gas_contribution'] = dk['Value (tCO2e)'].astype(float)/dk['Scope 1']
        dk = dk[dk['Gas_contribution'] < 1]
        order = dk[[sector_type, 'Gas_contribution']].groupby(sector_type).mean().sort_values(by = 'Gas_contribution')
        dk = dk.set_index(sector_type)
        dk = dk.loc[order.index].reset_index()
        dk = dk[dk[sector_type].isin(['Other']) == False]
        plt.figure(figsize = (12,8))
        ax = plt.subplot()
        sns.boxplot(y =sector_type, x = 'Gas_contribution', palette = 'coolwarm', data = dk, 
                    showfliers = False, boxprops=dict(alpha=.6)) #, showmeans=False,
                    #meanprops={'marker':'o','markerfacecolor':'k','markeredgecolor':'b','markersize':'10'}, ax = ax)
        sns.pointplot(y =sector_type, x = 'Gas_contribution', palette = 'coolwarm', data = dk, capsize=0.25)
                    
        plt.xlabel('Methane emissions over total Scope 1')
        plt.ylabel('')
        average = dk['Gas_contribution'].mean()
        print('Number of observations:', len(dk['Gas_contribution'].dropna()))
        if save_plot:
            plt.savefig('Figures/Figure2SI_Contribution.pdf', dpi = 300, bbox_inches = 'tight')
        return order

    #==== Print out the sector and geography mapping to ensure that there are not mistakes
    def print_out_sector_map_table(self, db):
        dbs, secMAP = smap.get_sector_map_step1(db)
        _, secMAP2 = smap.get_sector_map_step2(dbs)

        A = pd.DataFrame.from_dict(secMAP, orient='index')
        B = pd.DataFrame.from_dict(secMAP2, orient='index')
        C = pd.concat((A, B))
        C = C.groupby(C.index).last()
        C.columns = ['']
        C = C.sort_values(by = ['']).iloc[:-1]
        with pd.option_context("max_colwidth", 1000):
            print (C.to_latex())

    def print_out_geography_map_table(self, db):
        C= db[['SP_GEOGRAPHY' , 'Country_adj']].groupby('Country_adj').last()
        C = C.sort_values(by = 'SP_GEOGRAPHY')
        C.name = ''
        C.columns = ['Geography']
        with pd.option_context("max_colwidth", 1000):
            print (C.to_latex())
    def print_out_example_GWP_sources(self, db):
        a = db[db.GWP_adj.isin(['Other', 'Question not applicable'])]
        #=== Just to remove Japanese characters that are problematic in LaTex
        a = a[a.Country!='Japan']
        a  = a[a.Gas == 'CH4']
        rdn = np.random.choice(a.index, 15)
        t1 = a[['GWP', 'ReportingYear']].loc[rdn]
        t1.columns = ['GWP source', 'Reporting ear']
        with pd.option_context("max_colwidth", 1000):
            print(t1.to_latex(index=False, multicolumn=True, column_format='|p{10cm}|p{2.5cm}|').replace(r'\\', r'\\ \hline')) 
    #======================================================================= 
    #============================= NOT INCLUDED ============================ 
    #======================================================================= 
    def FIGUREXSI(self, db, Z, HORIZON=100,save_plot=False):
        '''
        This is just to try to decompose the gap into the 
        emission and guidelines deviation components
        '''
        if HORIZON == 100: 
            lat_methane = 28.4
        elif HORIZON == 20: 
            lat_methane = 81.1
        print('#======', lat_methane)
        A = GWPs.Counterfactual(db, Z, [2014,2015,2016,2017,2018,2019,2020,2021], 'AR5', horizon=HORIZON) 
        B = GWPs.Counterfactual(db, Z, [2022, 2023], 'AR6',latest_methane=lat_methane, horizon=HORIZON) 
        X = pd.concat((A, B))
        GASTYPE = 'CH4'
        X = X[X.Gas == GASTYPE]
        X['gdiff'] = X['EqualConversionFactor']- X['ConversionFactor']
        res = pd.DataFrame()
        resci = pd.DataFrame()
        for i in X.main_sector.dropna().unique():
            if i != 'Other':
                u = X[['Value (tCO2e)', 'gdiff', 'Delta', 'main_sector']].set_index('main_sector').astype(float).dropna()
                u = u.loc[i]
                u = u.apply(scale)
                x =  u[['Value (tCO2e)', 'gdiff']]
                y = u['Delta']
                lm = OLS(y, x).fit()
                a = pd.DataFrame(lm.params).transpose()
                a.columns = ['Emission', 'Deviation']
                a['Sector'] = [i]
                b = pd.DataFrame((lm.conf_int().diff(axis = 1)/2)[1]).transpose()
                b.columns = ['Emission', 'Deviation']
                b['Sector'] = [i]
            
                res = pd.concat((res, a))
                resci = pd.concat((resci, b))
        res = res.sort_values(by = 'Emission').set_index('Sector')
        resci = resci.set_index('Sector').loc[res.index]
        res.plot.barh(xerr = resci, capsize = 4, figsize = (8,14))
        plt.axvline(x=0, lw=  1, c= 'k', ls = '-.')
        if save_plot:
            plt.savefig('Figures/FigureXSI.pdf', dpi = 300, bbox_inches = 'tight')
        


#=============================================================== 
#============================= END ============================= 
#=============================================================== 
